Initialisation et import des données

install.packages("tidyverse")
Error in install.packages : Updating loaded packages
install.packages("pacman")
Error in install.packages : Updating loaded packages
install.packages("dplyr")
Error in install.packages : Updating loaded packages
library(tidyverse)

fig <- function(width, heigth){
     options(repr.plot.width = width, repr.plot.height = heigth)
}

library(pacman)

pacman::p_load(pacman,dplyr, ggplot2, rio, gridExtra, scales, ggcorrplot, caret, e1071)

dataSet <- read.csv('data/train.csv')
names(dataSet)
 [1] "battery_power" "blue"          "clock_speed"   "dual_sim"      "fc"            "four_g"        "int_memory"    "m_dep"         "mobile_wt"     "n_cores"       "pc"           
[12] "px_height"     "px_width"      "ram"           "sc_h"          "sc_w"          "talk_time"     "three_g"       "touch_screen"  "wifi"          "price_range"  

Affichage du set de données.

Colonnes : battery_power, blue, clock_speed, dual_sim, fc, four_g,int_memory, m_dep, mobile_wt, n_cores, pc, px_height, px_width, ram, sc_h, sc_w, talk_time, three_g, touch_screen, wifi, price_range

battery_power:Total energy a battery can store in one time measured in mAh blue:Has bluetooth or not clock_speed:speed at which microprocessor executes instructions dual_sim:Has dual sim support or not fc:Front Camera mega pixels four_g:Has 4G or not int_memory:Internal Memory in Gigabytes m_dep:Mobile Depth in cm mobile_wt:Weight of mobile phone n_cores:Number of cores of processor pc:Primary Camera mega pixels px_height:Pixel Resolution Height px_width:Pixel Resolution Width ram:Random Access Memory in Megabytes sc_h:Screen Height of mobile in cm sc_w:Screen Width of mobile in cm talk_time:longest time that a single battery charge will last when you are three_g:Has 3G or not touch_screen:Has touch screen or not wifi:Has wifi or not price_range: This is the target variable with value of 0(low cost), 1(medium cost), 2(high cost) and 3(very high cost).

dim(dataSet)
[1] 2000   21
class(dataSet)
[1] "data.frame"
head(dataSet)
sapply(dataSet, class)
battery_power          blue   clock_speed      dual_sim            fc        four_g    int_memory         m_dep     mobile_wt       n_cores            pc     px_height      px_width 
    "integer"     "integer"     "numeric"     "integer"     "integer"     "integer"     "integer"     "numeric"     "integer"     "integer"     "integer"     "integer"     "integer" 
          ram          sc_h          sc_w     talk_time       three_g  touch_screen          wifi   price_range 
    "integer"     "integer"     "integer"     "integer"     "integer"     "integer"     "integer"     "integer" 
summary(dataSet)
 battery_power         blue        clock_speed       dual_sim            fc             four_g         int_memory        m_dep          mobile_wt        n_cores            pc        
 Min.   : 501.0   Min.   :0.000   Min.   :0.500   Min.   :0.0000   Min.   : 0.000   Min.   :0.0000   Min.   : 2.00   Min.   :0.1000   Min.   : 80.0   Min.   :1.000   Min.   : 0.000  
 1st Qu.: 851.8   1st Qu.:0.000   1st Qu.:0.700   1st Qu.:0.0000   1st Qu.: 1.000   1st Qu.:0.0000   1st Qu.:16.00   1st Qu.:0.2000   1st Qu.:109.0   1st Qu.:3.000   1st Qu.: 5.000  
 Median :1226.0   Median :0.000   Median :1.500   Median :1.0000   Median : 3.000   Median :1.0000   Median :32.00   Median :0.5000   Median :141.0   Median :4.000   Median :10.000  
 Mean   :1238.5   Mean   :0.495   Mean   :1.522   Mean   :0.5095   Mean   : 4.309   Mean   :0.5215   Mean   :32.05   Mean   :0.5018   Mean   :140.2   Mean   :4.521   Mean   : 9.916  
 3rd Qu.:1615.2   3rd Qu.:1.000   3rd Qu.:2.200   3rd Qu.:1.0000   3rd Qu.: 7.000   3rd Qu.:1.0000   3rd Qu.:48.00   3rd Qu.:0.8000   3rd Qu.:170.0   3rd Qu.:7.000   3rd Qu.:15.000  
 Max.   :1998.0   Max.   :1.000   Max.   :3.000   Max.   :1.0000   Max.   :19.000   Max.   :1.0000   Max.   :64.00   Max.   :1.0000   Max.   :200.0   Max.   :8.000   Max.   :20.000  
   px_height         px_width           ram            sc_h            sc_w          talk_time        three_g        touch_screen        wifi        price_range  
 Min.   :   0.0   Min.   : 500.0   Min.   : 256   Min.   : 5.00   Min.   : 0.000   Min.   : 2.00   Min.   :0.0000   Min.   :0.000   Min.   :0.000   Min.   :0.00  
 1st Qu.: 282.8   1st Qu.: 874.8   1st Qu.:1208   1st Qu.: 9.00   1st Qu.: 2.000   1st Qu.: 6.00   1st Qu.:1.0000   1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.75  
 Median : 564.0   Median :1247.0   Median :2146   Median :12.00   Median : 5.000   Median :11.00   Median :1.0000   Median :1.000   Median :1.000   Median :1.50  
 Mean   : 645.1   Mean   :1251.5   Mean   :2124   Mean   :12.31   Mean   : 5.767   Mean   :11.01   Mean   :0.7615   Mean   :0.503   Mean   :0.507   Mean   :1.50  
 3rd Qu.: 947.2   3rd Qu.:1633.0   3rd Qu.:3064   3rd Qu.:16.00   3rd Qu.: 9.000   3rd Qu.:16.00   3rd Qu.:1.0000   3rd Qu.:1.000   3rd Qu.:1.000   3rd Qu.:2.25  
 Max.   :1960.0   Max.   :1998.0   Max.   :3998   Max.   :19.00   Max.   :18.000   Max.   :20.00   Max.   :1.0000   Max.   :1.000   Max.   :1.000   Max.   :3.00  
library(ggcorrplot)
corr <- round(cor(dataSet), 8)
ggcorrplot(corr)

fig(18, 16)
str(dataSet)
'data.frame':   2000 obs. of  21 variables:
 $ battery_power: int  842 1021 563 615 1821 1859 1821 1954 1445 509 ...
 $ blue         : int  0 1 1 1 1 0 0 0 1 1 ...
 $ clock_speed  : num  2.2 0.5 0.5 2.5 1.2 0.5 1.7 0.5 0.5 0.6 ...
 $ dual_sim     : int  0 1 1 0 0 1 0 1 0 1 ...
 $ fc           : int  1 0 2 0 13 3 4 0 0 2 ...
 $ four_g       : int  0 1 1 0 1 0 1 0 0 1 ...
 $ int_memory   : int  7 53 41 10 44 22 10 24 53 9 ...
 $ m_dep        : num  0.6 0.7 0.9 0.8 0.6 0.7 0.8 0.8 0.7 0.1 ...
 $ mobile_wt    : int  188 136 145 131 141 164 139 187 174 93 ...
 $ n_cores      : int  2 3 5 6 2 1 8 4 7 5 ...
 $ pc           : int  2 6 6 9 14 7 10 0 14 15 ...
 $ px_height    : int  20 905 1263 1216 1208 1004 381 512 386 1137 ...
 $ px_width     : int  756 1988 1716 1786 1212 1654 1018 1149 836 1224 ...
 $ ram          : int  2549 2631 2603 2769 1411 1067 3220 700 1099 513 ...
 $ sc_h         : int  9 17 11 16 8 17 13 16 17 19 ...
 $ sc_w         : int  7 3 2 8 2 1 8 3 1 10 ...
 $ talk_time    : int  19 7 9 11 15 10 18 5 20 12 ...
 $ three_g      : int  0 1 1 1 1 1 1 1 1 1 ...
 $ touch_screen : int  0 1 1 0 1 0 0 1 0 0 ...
 $ wifi         : int  1 0 0 0 0 0 1 1 0 0 ...
 $ price_range  : int  1 2 2 2 1 1 3 0 0 0 ...
prop.table(table(dataSet$blue)) # cell percentages

    0     1 
0.505 0.495 
prop.table(table(dataSet$dual_sim)) # cell percentages

     0      1 
0.4905 0.5095 
prop.table(table(dataSet$four_g)) # cell percentages

     0      1 
0.4785 0.5215 
prop.table(table(dataSet$three_g)) # cell percentages

     0      1 
0.2385 0.7615 
prop.table(table(dataSet$touch_screen)) # cell percentages

    0     1 
0.497 0.503 
prop.table(table(dataSet$wifi)) # cell percentages

    0     1 
0.493 0.507 

Subplots using filtered dataset

library(ggplot2)
data = data.frame(Dimensions_in_cm = c(dataSet$sc_h, dataSet$sc_w), 
               Screen = rep(c("Height", "Width"), c(length(dataSet$sc_h), length(dataSet$sc_w))))
ggplot(data, aes(Dimensions_in_cm, fill = Screen)) + 
  geom_bar(position = 'identity', alpha = .6)

library(ggplot2)
library(gridExtra)
p1 <-  ggplot(dataSet, aes(x=px_width, y = px_height, color=price_range)) +
  geom_boxplot(outlier.colour="red", outlier.shape=8,
               outlier.size=4) +
  labs(title = "Pixel Resolution Height vs Pixel Resolution Width")
p2 <- ggplot(dataSet, aes(x=price_range, y = ram, color=price_range)) +
  geom_boxplot(outlier.colour="red", outlier.shape=8,
               outlier.size=4) +
  labs(title = "RAM vs Price Range")
grid.arrange(p1, p2,nrow = 1)

fig(24, 20)
library(ggplot2)
library(gridExtra)
dataSet$price_range <- as.factor(dataSet$price_range)
p3 <- ggplot(dataSet, aes(x=price_range, y = int_memory, color=price_range)) +
  geom_boxplot(outlier.colour="red", outlier.shape=8,
               outlier.size=4) +
  labs(title = "int_memory vs Price Range")
p4 <- ggplot(dataSet, aes(x=price_range, y = battery_power, color=price_range)) +
  geom_boxplot(outlier.colour="red", outlier.shape=8,
               outlier.size=4) +
  labs(title = "Battery power vs Price Range")
grid.arrange(p3, p4,nrow = 1)

fig(24, 20)
library(ggplot2)
p <- ggplot(dataSet, aes(battery_power, ram, color = price_range))+
  geom_point()
p + stat_ellipse()

p <- ggplot(dataSet, aes(int_memory, ram, color = price_range))+
  geom_point()
p + stat_ellipse()

p <- ggplot(dataSet, aes(x = ram, y = ram, color = price_range))+
  geom_point()
p + stat_ellipse()


library(tidyverse)
library(plotly)

# Créer le graphique
p <- plot_ly(
  dataSet, x = dataSet$battery_power, y = dataSet$ram, z = dataSet$int_memory, 
  color = dataSet$price_range) %>%
  add_markers(size=1) %>%
  layout(
    scene = list(xaxis = list(title = 'Battery Power'),
        yaxis = list(title = 'Ram'),
        zaxis = list(title = 'Memoire interne'))
        )
p
LS0tCnRpdGxlOiAiQW5hbHlzZSBEYXRhIFNldCBNb2JpbGUgUHJpY2UiCm91dHB1dDogaHRtbF9ub3RlYm9vawotLS0KCkluaXRpYWxpc2F0aW9uIGV0IGltcG9ydCBkZXMgZG9ubsOpZXMKCmBgYHtyfQppbnN0YWxsLnBhY2thZ2VzKCJ0aWR5dmVyc2UiKQppbnN0YWxsLnBhY2thZ2VzKCJwYWNtYW4iKQppbnN0YWxsLnBhY2thZ2VzKCJkcGx5ciIpCmluc3RhbGwucGFja2FnZXMoInBsb3RseSIpCgpsaWJyYXJ5KHRpZHl2ZXJzZSkKCmZpZyA8LSBmdW5jdGlvbih3aWR0aCwgaGVpZ3RoKXsKICAgICBvcHRpb25zKHJlcHIucGxvdC53aWR0aCA9IHdpZHRoLCByZXByLnBsb3QuaGVpZ2h0ID0gaGVpZ3RoKQp9CgpsaWJyYXJ5KHBhY21hbikKCnBhY21hbjo6cF9sb2FkKHBhY21hbixkcGx5ciwgZ2dwbG90MiwgcmlvLCBncmlkRXh0cmEsIHNjYWxlcywgZ2djb3JycGxvdCwgY2FyZXQsIGUxMDcxKQoKZGF0YVNldCA8LSByZWFkLmNzdignZGF0YS90cmFpbi5jc3YnKQpgYGAKCgpgYGB7cn0KbmFtZXMoZGF0YVNldCkKYGBgCgpBZmZpY2hhZ2UgZHUgc2V0IGRlIGRvbm7DqWVzLgoKQ29sb25uZXMgOiBiYXR0ZXJ5X3Bvd2VyLCBibHVlLCBjbG9ja19zcGVlZCwgZHVhbF9zaW0sIGZjLCBmb3VyX2csaW50X21lbW9yeSwgbV9kZXAsIG1vYmlsZV93dCwgbl9jb3JlcywgcGMsIHB4X2hlaWdodCwgcHhfd2lkdGgsIHJhbSwgc2NfaCwgc2NfdywgdGFsa190aW1lLCB0aHJlZV9nLCB0b3VjaF9zY3JlZW4sIHdpZmksIHByaWNlX3JhbmdlCgpiYXR0ZXJ5X3Bvd2VyOlRvdGFsIGVuZXJneSBhIGJhdHRlcnkgY2FuIHN0b3JlIGluIG9uZSB0aW1lIG1lYXN1cmVkIGluIG1BaApibHVlOkhhcyBibHVldG9vdGggb3Igbm90CmNsb2NrX3NwZWVkOnNwZWVkIGF0IHdoaWNoIG1pY3JvcHJvY2Vzc29yIGV4ZWN1dGVzIGluc3RydWN0aW9ucwpkdWFsX3NpbTpIYXMgZHVhbCBzaW0gc3VwcG9ydCBvciBub3QKZmM6RnJvbnQgQ2FtZXJhIG1lZ2EgcGl4ZWxzCmZvdXJfZzpIYXMgNEcgb3Igbm90CmludF9tZW1vcnk6SW50ZXJuYWwgTWVtb3J5IGluIEdpZ2FieXRlcwptX2RlcDpNb2JpbGUgRGVwdGggaW4gY20KbW9iaWxlX3d0OldlaWdodCBvZiBtb2JpbGUgcGhvbmUKbl9jb3JlczpOdW1iZXIgb2YgY29yZXMgb2YgcHJvY2Vzc29yCnBjOlByaW1hcnkgQ2FtZXJhIG1lZ2EgcGl4ZWxzCnB4X2hlaWdodDpQaXhlbCBSZXNvbHV0aW9uIEhlaWdodApweF93aWR0aDpQaXhlbCBSZXNvbHV0aW9uIFdpZHRoCnJhbTpSYW5kb20gQWNjZXNzIE1lbW9yeSBpbiBNZWdhYnl0ZXMKc2NfaDpTY3JlZW4gSGVpZ2h0IG9mIG1vYmlsZSBpbiBjbQpzY193OlNjcmVlbiBXaWR0aCBvZiBtb2JpbGUgaW4gY20KdGFsa190aW1lOmxvbmdlc3QgdGltZSB0aGF0IGEgc2luZ2xlIGJhdHRlcnkgY2hhcmdlIHdpbGwgbGFzdCB3aGVuIHlvdSBhcmUKdGhyZWVfZzpIYXMgM0cgb3Igbm90CnRvdWNoX3NjcmVlbjpIYXMgdG91Y2ggc2NyZWVuIG9yIG5vdAp3aWZpOkhhcyB3aWZpIG9yIG5vdApwcmljZV9yYW5nZTogVGhpcyBpcyB0aGUgdGFyZ2V0IHZhcmlhYmxlIHdpdGggdmFsdWUgb2YgMChsb3cgY29zdCksIDEobWVkaXVtIGNvc3QpLCAyKGhpZ2ggY29zdCkgYW5kIDModmVyeSBoaWdoIGNvc3QpLgoKYGBge3J9CmRpbShkYXRhU2V0KQpjbGFzcyhkYXRhU2V0KQpoZWFkKGRhdGFTZXQpCnNhcHBseShkYXRhU2V0LCBjbGFzcykKYGBgCgoKYGBge3J9CnN1bW1hcnkoZGF0YVNldCkKYGBgCgoKYGBge3J9CmxpYnJhcnkoZ2djb3JycGxvdCkKY29yciA8LSByb3VuZChjb3IoZGF0YVNldCksIDgpCmdnY29ycnBsb3QoY29ycikKZmlnKDE4LCAxNikKYGBgCgoKYGBge3J9CnN0cihkYXRhU2V0KQpgYGAKCmBgYHtyfQpwcm9wLnRhYmxlKHRhYmxlKGRhdGFTZXQkYmx1ZSkpICMgY2VsbCBwZXJjZW50YWdlcwpwcm9wLnRhYmxlKHRhYmxlKGRhdGFTZXQkZHVhbF9zaW0pKSAjIGNlbGwgcGVyY2VudGFnZXMKcHJvcC50YWJsZSh0YWJsZShkYXRhU2V0JGZvdXJfZykpICMgY2VsbCBwZXJjZW50YWdlcwpwcm9wLnRhYmxlKHRhYmxlKGRhdGFTZXQkdGhyZWVfZykpICMgY2VsbCBwZXJjZW50YWdlcwpwcm9wLnRhYmxlKHRhYmxlKGRhdGFTZXQkdG91Y2hfc2NyZWVuKSkgIyBjZWxsIHBlcmNlbnRhZ2VzCnByb3AudGFibGUodGFibGUoZGF0YVNldCR3aWZpKSkgIyBjZWxsIHBlcmNlbnRhZ2VzCmBgYAoKClN1YnBsb3RzIHVzaW5nIGZpbHRlcmVkIGRhdGFzZXQKYGBge3J9CmxpYnJhcnkoZ2dwbG90MikKZGF0YSA9IGRhdGEuZnJhbWUoRGltZW5zaW9uc19pbl9jbSA9IGMoZGF0YVNldCRzY19oLCBkYXRhU2V0JHNjX3cpLCAKICAgICAgICAgICAgICAgU2NyZWVuID0gcmVwKGMoIkhlaWdodCIsICJXaWR0aCIpLCBjKGxlbmd0aChkYXRhU2V0JHNjX2gpLCBsZW5ndGgoZGF0YVNldCRzY193KSkpKQpnZ3Bsb3QoZGF0YSwgYWVzKERpbWVuc2lvbnNfaW5fY20sIGZpbGwgPSBTY3JlZW4pKSArIAogIGdlb21fYmFyKHBvc2l0aW9uID0gJ2lkZW50aXR5JywgYWxwaGEgPSAuNikKYGBgCgoKYGBge3J9CmxpYnJhcnkoZ2dwbG90MikKbGlicmFyeShncmlkRXh0cmEpCmRhdGFTZXQkcHJpY2VfcmFuZ2UgPC0gYXMuZmFjdG9yKGRhdGFTZXQkcHJpY2VfcmFuZ2UpCnAxIDwtICBnZ3Bsb3QoZGF0YVNldCwgYWVzKHg9cHhfd2lkdGgsIHkgPSBweF9oZWlnaHQsIGNvbG9yPXByaWNlX3JhbmdlKSkgKwogIGdlb21fYm94cGxvdChvdXRsaWVyLmNvbG91cj0icmVkIiwgb3V0bGllci5zaGFwZT04LAogICAgICAgICAgICAgICBvdXRsaWVyLnNpemU9NCkgKwogIGxhYnModGl0bGUgPSAiUGl4ZWwgUmVzb2x1dGlvbiBIZWlnaHQgdnMgUGl4ZWwgUmVzb2x1dGlvbiBXaWR0aCIpCnAyIDwtIGdncGxvdChkYXRhU2V0LCBhZXMoeD1wcmljZV9yYW5nZSwgeSA9IHJhbSwgY29sb3I9cHJpY2VfcmFuZ2UpKSArCiAgZ2VvbV9ib3hwbG90KG91dGxpZXIuY29sb3VyPSJyZWQiLCBvdXRsaWVyLnNoYXBlPTgsCiAgICAgICAgICAgICAgIG91dGxpZXIuc2l6ZT00KSArCiAgbGFicyh0aXRsZSA9ICJSQU0gdnMgUHJpY2UgUmFuZ2UiKQpncmlkLmFycmFuZ2UocDEsIHAyLG5yb3cgPSAxKQpmaWcoMjQsIDIwKQpgYGAKCgpgYGB7cn0KbGlicmFyeShnZ3Bsb3QyKQpsaWJyYXJ5KGdyaWRFeHRyYSkKZGF0YVNldCRwcmljZV9yYW5nZSA8LSBhcy5mYWN0b3IoZGF0YVNldCRwcmljZV9yYW5nZSkKcDMgPC0gZ2dwbG90KGRhdGFTZXQsIGFlcyh4PXByaWNlX3JhbmdlLCB5ID0gaW50X21lbW9yeSwgY29sb3I9cHJpY2VfcmFuZ2UpKSArCiAgZ2VvbV9ib3hwbG90KG91dGxpZXIuY29sb3VyPSJyZWQiLCBvdXRsaWVyLnNoYXBlPTgsCiAgICAgICAgICAgICAgIG91dGxpZXIuc2l6ZT00KSArCiAgbGFicyh0aXRsZSA9ICJpbnRfbWVtb3J5IHZzIFByaWNlIFJhbmdlIikKcDQgPC0gZ2dwbG90KGRhdGFTZXQsIGFlcyh4PXByaWNlX3JhbmdlLCB5ID0gYmF0dGVyeV9wb3dlciwgY29sb3I9cHJpY2VfcmFuZ2UpKSArCiAgZ2VvbV9ib3hwbG90KG91dGxpZXIuY29sb3VyPSJyZWQiLCBvdXRsaWVyLnNoYXBlPTgsCiAgICAgICAgICAgICAgIG91dGxpZXIuc2l6ZT00KSArCiAgbGFicyh0aXRsZSA9ICJCYXR0ZXJ5IHBvd2VyIHZzIFByaWNlIFJhbmdlIikKZ3JpZC5hcnJhbmdlKHAzLCBwNCxucm93ID0gMSkKZmlnKDI0LCAyMCkKYGBgCgpgYGB7cn0KbGlicmFyeShnZ3Bsb3QyKQpwIDwtIGdncGxvdChkYXRhU2V0LCBhZXMoYmF0dGVyeV9wb3dlciwgcmFtLCBjb2xvciA9IHByaWNlX3JhbmdlKSkrCiAgZ2VvbV9wb2ludCgpCnAgKyBzdGF0X2VsbGlwc2UoKQpwIDwtIGdncGxvdChkYXRhU2V0LCBhZXMoaW50X21lbW9yeSwgcmFtLCBjb2xvciA9IHByaWNlX3JhbmdlKSkrCiAgZ2VvbV9wb2ludCgpCnAgKyBzdGF0X2VsbGlwc2UoKQpwIDwtIGdncGxvdChkYXRhU2V0LCBhZXMoeCA9IHJhbSwgeSA9IHJhbSwgY29sb3IgPSBwcmljZV9yYW5nZSkpKwogIGdlb21fcG9pbnQoKQpwICsgc3RhdF9lbGxpcHNlKCkKCmxpYnJhcnkodGlkeXZlcnNlKQpsaWJyYXJ5KHBsb3RseSkKCiMgQ3LDqWVyIGxlIGdyYXBoaXF1ZQpwIDwtIHBsb3RfbHkoCiAgZGF0YVNldCwgeCA9IGRhdGFTZXQkYmF0dGVyeV9wb3dlciwgeSA9IGRhdGFTZXQkcmFtLCB6ID0gZGF0YVNldCRpbnRfbWVtb3J5LCAKICBjb2xvciA9IGRhdGFTZXQkcHJpY2VfcmFuZ2UpICU+JQogIGFkZF9tYXJrZXJzKHNpemU9MSkgJT4lCiAgbGF5b3V0KAogICAgc2NlbmUgPSBsaXN0KHhheGlzID0gbGlzdCh0aXRsZSA9ICdCYXR0ZXJ5IFBvd2VyJyksCiAgICAgICAgeWF4aXMgPSBsaXN0KHRpdGxlID0gJ1JhbScpLAogICAgICAgIHpheGlzID0gbGlzdCh0aXRsZSA9ICdNZW1vaXJlIGludGVybmUnKSkKICAgICAgICApCnAKYGBgCgo=